
use coal_est_remaining_resources1990-2014.dta, clear

* Calculating the value of the coal reserves variable for 1986-1998
* empty pretreatment dataset to merge with later
preserve 
	bysort name (year): gen n1 = _n
	keep if n1 <= 5
	replace year = 1985 if n1 == 1
	replace year = 1986 if n1 == 2
	replace year = 1987 if n1 == 3
	replace year = 1988 if n1 == 4
	replace year = 1989 if n1 == 5
	keep name year fips
	tempfile pretreatment
	save `pretreatment'
restore

* (a) assign all years 1986 - 1998 the average of the coal reserves for 1990 - 1998 for each county
preserve 
	* take average
	keep if year >= 1990 & year <= 1998
	egen average_coal = mean(coal), by(name)
	bysort name (year): gen n1 = _n
	keep if n1 == 1
	drop coal
	keep name average_coal
	
	merge 1:m name using `pretreatment'
	drop _m
	tempfile coal_average
	save `coal_average'
restore

* (b) take the 1990 value for coal reserves and assign it to all years 1986-1990
preserve
	keep if year == 1990
	drop year
	
	joinby using `pretreatment'
	
	rename coal coal_1990
	tempfile coal1990
	save `coal1990'
restore

use `coal_average', clear
append using coal_est_remaining_resources1990-2014
joinby name fips year using `coal1990', unmatched(both)
drop _m

*Fill in 1991-1998 with average for 1990-1998
bysort name: egen tmp1=mean(average_coal)
replace average_coal = tmp1 if average_coal == .

*Fill in 1991-1998 with actual coal reserve values
replace coal_1990 = coal if coal_1990 == .

*---
* Merge in coal price and index data 
preserve
// CPIU data
import delimited using "CWSR0000SA0.csv", clear
gen year_string = substr(date, 1, 4)
gen int year = real(year_string)
rename cws~19850101 cpiu_1985
drop date year_string
tempfile cpiu_temp
save `cpiu_temp'

// PPI data
import delimited using "PCU21212121.csv", clear
gen year_string = substr(date, -2, 2)
gen int year = real(year_string)
replace year = 1900 + year if year>=85
replace year = 2000 + year if year<85
rename pcu* ppi_1985
drop date year_string
tempfile ppi_temp
save `ppi_temp'
restore

* Merge coal prices
merge m:1 year using "`cpiu_temp'"
drop _merge

merge m:1 year using "`ppi_temp'"
drop _merge
drop if fips == . 

order name year fips
sort name year

* Calculate real coal prices
gen coal_price_index = ppi_1985 / cpiu_1985

* multiply each by the level of coal prices
gen coal_reserve_price_1990 = coal_1990 * coal_price_index
gen coal_reserve_price_avg = average_coal * coal_price_index

* Create logged coal value variable
gen log_coal_reserve_price_1990=ln(coal_reserve_price_1990)
gen log_coal_reserve_price_avg=ln(coal_reserve_price_avg)

* create two lags of each variable

*Levels
xtset fips year
gen lag1_coal_price_1990=l.coal_reserve_price_1990
gen lag2_coal_price_1990=l2.coal_reserve_price_1990

gen lag1_coal_price_avg=l1.coal_reserve_price_avg
gen lag2_coal_price_avg=l2.coal_reserve_price_avg

*Logs
gen lag1_logcoal_price_1990=l.log_coal_reserve_price_1990
gen lag2_logcoal_price_1990=l2.log_coal_reserve_price_1990

gen lag1_logcoal_price_avg=l1.log_coal_reserve_price_avg
gen lag2_logcoal_price_avg=l2.log_coal_reserve_price_avg

*Fix capitalization in name for McLean county
replace name="McLean" if name=="Mclean"

gen str3 county=string(fips,"%03.0f")
gen countyfip="21"+county
destring countyfip, replace
order countyfip

* Save DTA file
save cntycoaldata.dta, replace

